
import glob
import re
import string
import pepe.p_txt.bigtxt as bt
import pepe.p_filesystem.basic as pfb

fslist = lambda x, y: pfb.fslist(x, patterns=y, recurse=True, return_folders=False)

crlf = '\x0D\x0A'
files = fslist(  r"""U:\...""" + '\x5C'
               , '*.txt')

o = open('output.aaa', 'wb')
p = open('processed_files.dat', 'wb')
d = open('dropped.dat', 'wb')
c = open('checksums.dat', 'wb')

filescnt = 0
for f in files:
    filescnt += 1
    print f

    '''
    pf_masks = [
    r""" """,
    ]
    '''
    #for pf_mask in pf_masks:
    #print pf_mask
    #for inpfile in glob.glob('*.txt'):
    #for inpfile in glob.glob(pf_mask):

    fname = string.split(f, '\x5C')[-1]

    if fname not in ('', ''):
        print f
        p.write(f)

        lino = 0
        for line in bt.lines(f):
            lino += 1

            line = re.sub(crlf, '', line)

            if  re.search('^Hewlett-Packard ISE GmbH', line) or \
                re.search('^Dornach', line) or \
                re.search('^---------------------', line) or \
                re.search('^\|       Document header', line) or \
                re.search('^Hewlett-Packard ISE GmbH', line) or \
                re.search('^\|     number     ty date', line) or \
                re.search('^SAP AG', line) or \
                re.search('^Walldorf', line) or \
                re.search('^no data available for', line) or \
                re.search('^\|CoCd Document   Do Posting', line):
                d.write(line + crlf)

            elif re.match('^\|.{18}\|', line):
                c.write(line + '\t' + f + crlf)

            else:
                o.write(line + '\t' + f + crlf)

o.close()
d.close()
c.close()
p.close()
